#! /usr/bin/python
#
# repcacheman ver 0.11 
#
# Cache Manager for Http-Replicator
# deletes duplicate files in PORTDIR.
# imports authenticated (md5 + listed in portage)
# files from PORTDIR to replicator's cache directory.
#
# MD5, and database routines ripped from Portage  
# All else, Copyright(C)2004 Tom Poplawski (poplawtm@earthlink.net)
# Distributed under the terms of the GNU General Public License v2
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.



import os,string,pwd,sys

if os.getuid():
	print"Must be root"
	exit(1)

# sys.path = ["/usr/lib/portage/pym"]+sys.path


# Perform the Checksum on a file
def perform_checksum(filename):
	import md5
	myfilename=filename
	f = open(myfilename, 'rb')
	blocksize=327680
	data = f.read(blocksize)
	sum = md5.new()
	while data:
		sum.update(data)
		data = f.read(blocksize)
	f.close()

	return (sum.hexdigest())


# Import http-replicator settings
execfile('/etc/http-replicator.conf')

user=USER
if user:
	try:
		uid=pwd.getpwnam(user)[2]
		gid=pwd.getpwnam(user)[3]
	except:
		print"User \'" + user + "\' Doesn't exist on system - edit config or add user to system."
		exit(1)
		

else:
	print "Error\n\tunable to get USER from /etc/http-replicator.conf"
	exit(1)
	
# DIR is replicator cache directory
dir=DIR+"/"
if dir:
	if os.path.isdir(dir):
		print "\n\nReplicator's cache directory: " + dir
		newdir=0
	else:
		print"\n\nBegin Http-Replicator Setup...."
		try:
			os.mkdir(dir)
			print "\tcreated " + dir
			newdir=1
		except:
			print "\tcreate " + dir + " failed"
			print '\terror:', sys.exc_info()[1]
			exit(1)
	if newdir:
		try:
			os.chown(dir,uid,gid)
			print "\tchanged owner of " + dir + " to " + user 
		except:
			print "\tchange owner " + dir + " to " + user + " failed:"
			print '\terror:', sys.exc_info()[1]


# Portage settings
import portage
distdir=portage.settings["DISTDIR"]+"/"
if distdir:
	print "Portage's DISTDIR: " + distdir
else:
	print"Unable to get Portage's DISTDIR"
	exit(1)

print "\nComparing directories...."

# Create filecmp object with 
import filecmp
dc=filecmp.dircmp (distdir,dir,['cvs-src','.locks'])
print "Done!"

#def __getattr__(self,common):
#print  dc.left_only

dupes=dc.common

deleted=0
if dupes:
	print "\nDeleting duplicate file(s) in " + distdir

	for s in dupes:
		print s
		try:
			os.remove(distdir + s )
			deleted +=1
		except:
			print "\tdelete " + distdir + s + " failed:"
			print '\terror:', sys.exc_info()[1]

	print "Done!"

newfiles=dc.left_only
if newfiles:
	print "\nNew files in DISTDIR:"
	for s in newfiles:
		print s
	print"\nChecking authenticity and integrity of new files..."
	print "Searching for ebuilds's ...."

	md5_list = {}

	# get all ebuilds? 
	ebuildlist = []
	for mycp in portage.db["/"]["porttree"].dbapi.cp_all():
		ebuildlist += portage.db["/"]["porttree"].dbapi.cp_list(mycp)
	ebuildlist.sort()

	print "Done!"
	print "\nFound " + str(len(ebuildlist)) + " ebuilds."

#	mycpv =app-admin/ulogd-0.89
#	pv = ulogd-0.89

	print "\nExtracting the checksums...."
	for mycpv in ebuildlist:
		pv = string.split(mycpv, "/")[-1]


		# lookup each ebuild digest and md5sums
		digestpath = portage.db["/"]["porttree"].dbapi.findname(mycpv)
		digestpath = os.path.dirname(digestpath)+"/files/digest-"+pv
		md5sums    = portage.digestParseFile(digestpath)
	
		if md5sums == None:
			portage.writemsg("Missing digest: %s\n" % mycpv)
			md5sums = {}

		for x in md5sums.keys():
			if x[0] == '/':
				del md5sums[x]
	

		for k, v in md5sums.iteritems():
			md5_list[k]= v
				


	del ebuildlist
	print "Done!\n\nVerifying checksum's...."
	
	added=0
	rejected=0
	suspect=0
	missing=0
	
	for file in newfiles:
		
		if not md5_list.has_key (file):
			print "\nWARNING " + file + " is not in portage!!!\n"
			suspect +=1
			continue
		myfile=distdir + file
		if os.path.isdir(myfile):
			print "\nskipping directory:",myfile
			continue
		mymd5=perform_checksum(myfile)

		print myfile 
#		print "md5 is " + mymd5

		t= md5_list[file]
		if t["MD5"]:
			if t["MD5"] == mymd5:
		 		print "MD5 OK"
				try:
					os.rename(distdir+file,dir+file)
					added += 1
				except:
					try:
						import shutil
						shutil.copyfile(distdir+file,dir+file)
						added += 1
						os.remove(distdir+file)
					except:
						print "\tmove/copy " + file + " failed:"
						print '\terror:', sys.exc_info()[1]
						continue
				try:
					os.chown(dir+file,uid,gid)
				except:
					print "\tchown " + file + " failed:"
					print '\terror:', sys.exc_info()[1]
			else:
				print "CORRUPT or INCOMPLETE "
				rejected +=1
		else:
			missing +=1
			print "\nEbuild missing digest for " +file
		
		
print "\nSUMMARY:\nFound " + str(len(dupes)) + " duplicate file(s)."
print "\tDeleted " + str(deleted) + " dupe(s)."
if newfiles:
	print "Found " + str(len(newfiles)) + " new file(s)."
	print "\tAdded " + str(added) + " of those file(s) to the cache."
	print "\tRejected " + str(rejected) + " corrupt or incomplete file(s)."
	if missing:
		print "\tCan't check " +str(missing) + " file(s) because of missing digest."
	if suspect:
		print "\t" + str(suspect) + " Unknown file(s) that are not listed in portage\
\n\tYou may want to delete them yourself...."
if newdir:
	print"\n\nexecute:\n/etc/init.d/http-replicator start"
	print"to run http-replicator.\nexecute:\nrc-update add http-replicator default"
	print"to make http-replicator start at boot"
	print"execute:\n/usr/bin/repcacheman\nfrequently to delete"
	print"dup files and add new files to the cache"
print "\n\nDone!\n\n"